from plyny.collections2.slice import convert_args, countedslice
from plyny.presentation_table import Presentation
from plyny.workstreams.paths import IterationWorkstreamPath, _BasePath, WorkstreamPath
from plyny.workstreams.config import usage, config
from plyny.collections2.bigchain import bigchain, retaining_bigchain
import util.labels as labels
from plyny.table import KnownLengthStreamingTable, StreamStreamTable, StreamingTable, _IterableData, DataTable

from plyny.plio.files import Path, Directory, File

import plyny.util.jsonloader as json

from itertools import islice
import atexit
import os
import sys


# Todo:
#     KnownLengthStreamingTable and WorkstreamReader are a total mess.
#     any alternative to 'close' for length?
#     spooling or threaded writes
#     should always be sorted by some key?
#     do something about open -> silent fail
#     make write_only & stream defaults
#     what does _create do here? problems with reopening stream


class FileInputStream(bigchain, _IterableData):
    def __init__(self, fo, length=None):
        if not isinstance(fo, File):
            raise TypeError('Requires File (received %s)' % type(fo))

        self.fo = fo
        bigchain.__init__(self, iter(self))  # weird, but works
        _IterableData.__init__(self)
        self._length = length

    def slice(self, *args):
        return self._wrap_iter(countedslice(self.fo.stripped(), *convert_args(*args, length=self._length)))

    def _wrap_iter(self, itr):
        return bigchain(json.loads(x) for x in itr)

    def __iter__(self):
        return iter(self._wrap_iter(self.fo.stripped()))


class FileOutputStream(_IterableData):
    def __init__(self, fo):
        _IterableData.__init__(self)
        self._cache = []
        self._cache_size = None
        if not isinstance(fo, File):
            raise TypeError('Requires File (received %s)' % type(fo))

        self._len = 0
        self.fo = fo.writer()

    def set_cache(self, cache_size):
        self._cache_size = cache_size

    def _flush(self, force=False):
        if force or len(self._cache) >= self._cache_size:
            self.extend(self._cache)
            self._cache = []

    def append(self, values):
        if self._cache_size is not None:
            self._cache.append(values)
            self._flush()
        else:
            self.extend([values])

    def extend(self, values):
        for v in values:
            self.fo.write(json.dumps(v) + '\n')
            self._len += 1

    def __del__(self):
        self.close()

    def close(self):
        self._flush(True)
        if hasattr(self, 'fo'):
            # XXX
            self.fo.close()


class _workstream_itr(retaining_bigchain):
    def __init__(self, outer, *other_names):
        super(_workstream_itr, self).__init__()
        self.outer = outer
        self.other_names = other_names
        self._ex_force = False
        self._ex_existing_only = False
        self._ex_stream = False
        self._ex_tolerate_missing = False
        self._ex_require = []
        self.run_yet = False

    def require(self, *sources):
        return self._itr_variant('_ex_require', sources)

    def force(self):
        return self._itr_variant('_ex_force')

    def existing_only(self):
        return self._itr_variant('_ex_existing_only')

    def stream(self):
        return self._itr_variant('_ex_stream')

    def tolerate_missing(self):
        return self._itr_variant('_ex_tolerate_missing')

    def _itr_variant(self, variant, value=True):
        import copy
        c = copy.copy(self)
        c.outer = self.outer
        setattr(c, variant, value)
        return c

    def __str__(self):
        return str(list(self))

    def __iter__(self):
        if not self.run_yet:
            values = self.outer(self._ex_force, self._ex_existing_only,
                    self._ex_stream, self._ex_tolerate_missing,
                    self._ex_require, *self.other_names)

            self.extend(values)

            self.run_yet = True

        return super(_workstream_itr, self).__iter__()


class Manager(object):
    def __init__(self, dirname=None, streaming=False, compress=False,
            origin=None, process=None):

        if dirname is None:
            from plyny.plio.files import TemporaryDirectory
            dirname = TemporaryDirectory()

        if not isinstance(dirname, Directory):
            dirname = Directory(dirname)

        self._dir = dirname

        if process is None:
            process = Manager.current_process_workstream()

        if origin is None:
            creator = self._dir.subdirectory(process)
        else:
            creator = self._dir

        self._creator = creator

        if origin is None:
            origin = self

        self._origin = origin
        self._streaming = streaming
        self._use_compress = compress
        if not self._use_compress:
            self.compress = Manager(self._creator, streaming=self._streaming,
                    compress=True, origin=self._origin)

        if self._streaming:
            self.write_only = self
            self.stream = self
        else:
            self.write_only = Manager(self._creator, streaming=True,
                    compress=self._use_compress, origin=self._origin)
            self.stream = Manager(self._dir, streaming=True,
                    compress=self._use_compress, origin=None)

        self._revision = None
        self._assign_datatable()

        self._WorkstreamReaderClass = WorkstreamReader  # configurable for unit test override

    def open_current(self):
        return self.open(self.__class__.current_process_workstream())

    @staticmethod
    def current_process_workstream():
        process = sys.argv[0].replace('/', ':')
        process = '.'.join(process.split('.')[:-1])
        return process

    def get_path(self, path=''):
        if path.startswith('/'):
            raise ValueError('%s not relative' % path)

        return WorkstreamPath.create_with_path(self._dir, path, self)

    @property
    def scm_revision(self):
        return self._revision

    @scm_revision.setter
    def scm_revision(self, revision):
        self._revision = revision

    def __repr__(self):
        return '%s(%s)' % (self.__class__.__name__, self.name())

    def names_minus_substream(self, path=None):
        if path is None:
            name = unicode(self.name())
            path = WorkstreamPath.create_with_path(self._origin._dir, name, self)
        else:
            name = path
            path = WorkstreamPath.create_with_path(self._origin._dir, path, self)

        paths = path.workstreams()
        for x in paths:
            yield unicode(x._path % Path(name)), x
#
#            a, b = x.substream_as_path().abs(), Path(name).abs()
#            print a, b
#            if not unicode(a).startswith(unicode(b)):
#                continue
#
#            y = unicode(a % b)
#            if y == '.':
#                continue
#
#            yield y, x

    def remainder(self, *other_names):
        return _workstream_itr(self._remainder, *other_names)

    def _remainder(self, force_all, existing_only, stream, tolerate_missing,
            require, *other_names):

        others = []
        for other_name in other_names:
            others += WorkstreamPath.create_with_path(self._origin._dir, other_name, self).workstreams()

        selfs = WorkstreamPath.create_with_path(self._origin._dir, self.current_process_workstream, self).workstreams()
        names = set(item.current().name() for item in others)
        done = set(item.current().name() for item in selfs)
        return names - done

    def _remainder2(self, force_all, existing_only, stream, tolerate_missing,
            require, *other_names):

        name = unicode(self.name())
        path = WorkstreamPath.create_with_path(self._origin._dir, name, self)
        others = []
        for other_name in other_names:
            others += WorkstreamPath.create_with_path(self._origin._dir,
                    other_name, self).workstreams()

        we_have_done = []
        we_have_doneh = {}

        for s in path.substreams():
            for w in s.workstreams():
                sb = w.substreams()[-1]
                we_have_done.append(sb)
                we_have_doneh.setdefault(sb, []).append(w)

        they_have_done = []
        they_have_doneh = {}

        for s in others:
            for w in s.workstreams():
                sb = w.substreams()
                they_have_done.append('/'.join(sb))
                they_have_doneh.setdefault(sb, []).append(w)

        print 'XX', we_have_done, they_have_done

        they_have_done = set()
        for sb, values in they_have_doneh.iteritems():
            if len(values) == len(other_names) or tolerate_missing:
                requirement_satisfied = True
                if require:
                    for r in require:
                        for v in values:
                            if unicode(v).startswith(unicode(r)):
                                break
                        else:
                            requirement_satisfied = False
                            break

                if requirement_satisfied:
                    they_have_done.add(sb)

        exists = set(they_have_done)
        done = set(we_have_done)

        todo = set()

        if force_all:
            todo = exists
        else:
            for i in exists - done:
                todo.add(i)

#            try:
#                for i in exists.intersection(done):
#                    if i in item and item[i].date() > a[i].date():
#                        todo.add(i)
#            except Exception as e:
#                print e

        for i in sorted(todo):
            me = self.substream(i)
            # XXX hack
            yield [self._WorkstreamReaderClass(x, stream=stream) for x in they_have_doneh[i]], me

    def _assign_datatable(self):
        # We want to be able to construct DataTable that have their own
        # reference to a Workstream, without, of course, requiring the user to
        # supply this every time

        def x(cls, *values):
            w = WorkstreamWriter(self._create_workstream_file(cls._dir,
                compress=self._use_compress), self._streaming, *values)
            if self._revision is not None:
                w.assign_scm_revision(self._revision)
            return w

        import new
        self.DataTable = new.classobj('_SWT', (WorkstreamWriter,),
            {'_dir': self._creator,
             '__new__': x
            })

    def name(self):
        return unicode(self._creator.path() % self._origin._dir.path())

    def substream(self, name=None):
        if name is None:
            length = len(list(self.get_path().workstreams()))
            name = labels.get_next(length)

        return Manager(self._creator.subdirectory(name),
                streaming=self._streaming, compress=self._use_compress,
                origin=self._origin)

    def dump(self, dt):
        w = self.DataTable(*dt.column_names)
        w.extend(dt)

    def date(self, ws, iteration=-1):
        paths = WorkstreamPath.create_with_path(self._dir, ws, self).workstreams()
        dates = []
        for item in paths:
            dates.append(item.date())

        if len(dates) == 1:
            return dates[0]

        return dates

    def exists(self, ws):
        try:
            paths = WorkstreamPath.create_with_path(self._dir, ws, self).workstreams()
            for item in paths:
                if not item.exists():
                    return False
        except (OSError, IOError):
            return False

        return True

    def list(self, ws=None):
        if ws is None:
            ws = ''

        return WorkstreamPath.create_with_path(self._dir, ws, self)

    def read(self, path=None):
        return self.open(path, merge=False)

    def open(self, path=None, iteration=None, merge=True, stream=False):
        if path == '-':
            raise NotImplementedError()

        if path is None:
            path = ''

        paths = []

        if stream and not self._streaming:
            return self.stream.open(path)

        if not isinstance(path, list):
            path = [path]

        for p in path:
            if not isinstance(p, _BasePath):
                p = self.get_path(p)
            cur = p.current()
            if not isinstance(cur, list):
                cur = [cur]
            for c in cur:
                paths.append(c)  # XXX hack -> assumes 'a' always

        files = []
        for x in paths:
            pth = x.file_path()
            if not pth.exists():
                pth = pth.add_extension('.gz')
                if not pth.exists():
                    raise OSError('%s not found' % x.file_path())

            pth = IterationWorkstreamPath.create_with_abspath(self._dir, pth, self)
            files.append(self._WorkstreamReaderClass(pth, self._streaming))

        if not files:
            return None

        for f in files:
            f.usage().add_usage()

        if merge:
            if len(files) == 1:
                return files[0]
            files = files[0].connect(*files)

        return files

    def _start_time(self):
        import psutil
        import time
        p = psutil.Process(os.getpid())
        return time.strftime('%Y.%m.%d.%H.%M.%S',
                time.localtime(p.create_time))

    def _create_workstream_file(self, directory, compress=False):
        if not isinstance(directory, Directory):
            directory = Directory(directory)

        directory.create_unless_exists()

        activedir = directory

        filename = self._start_time()

        # ws/process_name[/tag]/ts
        current = Directory.from_dir(activedir, filename)

        if not current.exists():
            current.create()
    #        from plyny.plio.files import FileWriter
    #        from plyny.io.output import Tee
    #        Tee(FileWriter(current / '.output'))

        link_to = current

        count = len(current.list_names().ignore_dot())

        filename = 'output'
    #    filename = labels.get_next(count)
        if compress:
            filename = filename + '.gz'

        # ws/process_name[/tag]/ts/[a-z]
        current = File.from_dir(current, filename)

        f = File.from_dir(activedir, 'current')
        if f.exists():
            f.remove()

        os.symlink(link_to.path().basename(), str(f.path()))

        return WorkstreamPath.create_with_abspath(self._dir, current.path(), self)


#if __name__ == '__main__':
#    from easyconfig import Config
#    c = Config()
#
#    with c.parser() as p:
#        pass
#
##    print c.workstreams()._get_inner_paths('/path/to/a,b/and/c')


def json_parse2(x, depth=0):
    res = []
    instr = False
    cur = []
    isfloat = False
    isstr = False
    resappend = res.append
    curappend = cur.append
    curstart = None

    i = 0
    lenx = len(x)
    while i + 1 < lenx:
        i += 1
        k = x[i]
        if k == '[':
            if i != 0:
                to_match = 1
#                s = []
                for l in xrange(i + 1, lenx):
                    xi = x[l]
                    if xi == '[':
                        to_match += 1
                    elif xi == ']':
                        to_match -= 1

                    if to_match == 0:
                        break
                resappend(splitter(x[i:l], depth + 1))
                cur = None
                i = l
                continue
        if k == '"':
            instr = not instr
            if instr:
                isstr = True
        elif not instr:
            if k == ',' or k == ']':
                if cur:
                    s = x[curstart:i]
#                    s = ''.join(cur)
                    if isfloat:
                        resappend(float(s))
                    elif not isstr:
                        resappend(int(s))
                    else:
                        resappend(s)

                isfloat = False
                isstr = False
                curstart = None
                cur = []
                curappend = cur.append
                continue
            elif k == ' ':
                continue

        if not instr and k == '.':
            isfloat = True

        if curstart is None:
            curstart = i
        curappend(k)

    return res


def json_parse(s):
    print s
    nobrackets = s[:-1]
    vals = []
    toks = nobrackets.split(',')
    cur = 0
    l = len(toks)
    append = vals.append

    while cur < l:
        x = toks[cur]
        x = x[1:]
        if x[0] is '[':
            y = None
            cur += 1
            txt = [x]
            while y is None or y[-1] != ']':
                y = toks[cur]
                txt.append(', ' + y)
                cur += 1
            append(parse(''.join(txt)))
        elif x[0] == 'u':
            append(unicode(x[2:-1]))
        elif x[0] == '"':
            append(x[1:-1])
        elif x[0] is 'n':
            append(None)
        elif '.' in x:
            append(float(x))
        else:
            append(int(x))

        cur += 1

    return vals


class _WorkstreamIO(object):
    def __init__(self, path):
        if not isinstance(path, _BasePath):
            raise TypeError('paths not WorkstreamPath (received %s)' %
                    type(path))

        self._path = path
        self._config = None
        self._usage = None

    def assign_scm_revision(self, revision):
        self.config().scm_revision = revision

    @property
    def column_names(self):
        return tuple(self.config().column_names)

    def config(self):
        return self._get_config('config', config)

    def __len__(self):
        return self.config().length

    def usage(self):
        return self._get_config('usage', usage)

    def _get_config(self, name, cons):
        v = getattr(self, '_%s' % name)
        if v is None:
            path = self._path
            if not isinstance(path, Path):
                path = self._path.file_path()

            p = Path(path.branch()) / ('%s' % name)
            setattr(self, '_%s' % name, cons(p))

        v = getattr(self, '_%s' % name)
        return v

    def path(self):
        return self._path


class WorkstreamReader(DataTable, Presentation, _WorkstreamIO):
    def __init__(self, path, stream=False):
        fo = File(path.file_path())

        DataTable.__init__(self)
        Presentation.__init__(self)
        _WorkstreamIO.__init__(self, path)
        self.stream = stream
        self._column_names = self.config().column_names
        self.length = _WorkstreamIO.__len__(self)
        self._data = FileInputStream(fo, self.length)
        self.tolerate_zip_errors = False  # XXX

    def slice(self, *args):
        s = slice(*args)
        stop = s.stop
        if stop is None:
            stop = len(self)
        stop = min(stop, len(self))
        x = xrange(s.start or 0, stop, s.step or 1)

        return KnownLengthStreamingTable(len(x),
                *self.column_names).extend(self._data.slice(*args))

    @classmethod
    def connect(cls, *tables):
        return StreamStreamTable(*tables)

    def tape(self, other):
        d = self.empty_copy()
        d = d.tape(self).tape(other)
        return d

    def __len__(self):
        return self.length

    def empty_copy(self):
        d = self._create(*self.column_names)
        return d

    def _create(self, *column_names):
        if self.stream:
            try:
                return KnownLengthStreamingTable(len(self), *column_names)
            except TypeError:
                return StreamingTable(*column_names)
        else:
            return DataTable(*column_names)


class WorkstreamWriter(_WorkstreamIO, DataTable):
    def __init__(self, wspath, streaming, *column_names):
        DataTable.__init__(self, *column_names)
        _WorkstreamIO.__init__(self, wspath)

        self._data = FileOutputStream(File(wspath.file_path()))
        self._streaming = streaming
        self.config().column_names = column_names
        self.config().args = sys.argv
        atexit.register(self.close)
        self.closed = False

    def close(self):
        if self.closed:
            return

        self.config().length = self._data._len
        self._data.close()
        self.closed = True
